import jsonlines
import sys

path = sys.argv[1]
o_path = sys.argv[2]

def is_chinese(uchar):
    if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
        return True
    else:
        return False

def filter_line(line):
    all_count = 0
    filter_count = 0
    iszh = False
    for conv in line["conversations"]:
        all_count = 0
        filter_count = 0
        for char in conv["value"]:
            all_count += 1
            if is_chinese(char):
                filter_count += 1
        if all_count == 0 or (filter_count / all_count > 0.2):
            return True
    return False

datas = []
from tqdm import tqdm
with jsonlines.open(path) as f:
    for line in tqdm(f):
        if not filter_line(line):
            datas.append(line)


with jsonlines.open(o_path,"w") as wf:
    for line in datas:
        wf.write(line)
